import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image
image_path = "C:/Users/Elif/Desktop/spt.png"
Image(image_path)
df = pd.read_csv(r"C:\Users\Elif\Desktop\Bitirme Projesi\asset-v1 Kodlasam+VAM-03+2023_02+type@asset+block@asset-v1_Kodlasam_MS01_2023_06_type_asset_block_Spotify_Youtube.csv")
df.head()
| Unnamed: 0 | Artist | Url_spotify | Track | Album | Album_type | Uri | Danceability | Energy | Key | ... | Url_youtube | Title | Channel | Views | Likes | Comments | Description | Licensed | official_video | Stream | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | Gorillaz | https://open.spotify.com/artist/3AA28KZvwAUcZu... | Feel Good Inc. | Demon Days | album | spotify:track:0d28khcov6AiegSCpG5TuT | 0.818 | 0.705 | 6.0 | ... | https://www.youtube.com/watch?v=HyHNuVaZJ-k | Gorillaz - Feel Good Inc. (Official Video) | Gorillaz | 693555221.0 | 6220896.0 | 169907.0 | Official HD Video for Gorillaz' fantastic trac... | True | True | 1.040235e+09 |
| 1 | 1 | Gorillaz | https://open.spotify.com/artist/3AA28KZvwAUcZu... | Rhinestone Eyes | Plastic Beach | album | spotify:track:1foMv2HQwfQ2vntFf9HFeG | 0.676 | 0.703 | 8.0 | ... | https://www.youtube.com/watch?v=yYDmaexVHic | Gorillaz - Rhinestone Eyes [Storyboard Film] (... | Gorillaz | 72011645.0 | 1079128.0 | 31003.0 | The official video for Gorillaz - Rhinestone E... | True | True | 3.100837e+08 |
| 2 | 2 | Gorillaz | https://open.spotify.com/artist/3AA28KZvwAUcZu... | New Gold (feat. Tame Impala and Bootie Brown) | New Gold (feat. Tame Impala and Bootie Brown) | single | spotify:track:64dLd6rVqDLtkXFYrEUHIU | 0.695 | 0.923 | 1.0 | ... | https://www.youtube.com/watch?v=qJa-VFwPpYA | Gorillaz - New Gold ft. Tame Impala & Bootie B... | Gorillaz | 8435055.0 | 282142.0 | 7399.0 | Gorillaz - New Gold ft. Tame Impala & Bootie B... | True | True | 6.306347e+07 |
| 3 | 3 | Gorillaz | https://open.spotify.com/artist/3AA28KZvwAUcZu... | On Melancholy Hill | Plastic Beach | album | spotify:track:0q6LuUqGLUiCPP1cbdwFs3 | 0.689 | 0.739 | 2.0 | ... | https://www.youtube.com/watch?v=04mfKJWDSzI | Gorillaz - On Melancholy Hill (Official Video) | Gorillaz | 211754952.0 | 1788577.0 | 55229.0 | Follow Gorillaz online:\nhttp://gorillaz.com \... | True | True | 4.346636e+08 |
| 4 | 4 | Gorillaz | https://open.spotify.com/artist/3AA28KZvwAUcZu... | Clint Eastwood | Gorillaz | album | spotify:track:7yMiX7n9SBvadzox8T5jzT | 0.663 | 0.694 | 10.0 | ... | https://www.youtube.com/watch?v=1V_xRb0x9aw | Gorillaz - Clint Eastwood (Official Video) | Gorillaz | 618480958.0 | 6197318.0 | 155930.0 | The official music video for Gorillaz - Clint ... | True | True | 6.172597e+08 |
5 rows × 28 columns
df.shape
(20718, 28)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 20718 entries, 0 to 20717 Data columns (total 28 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 20718 non-null int64 1 Artist 20718 non-null object 2 Url_spotify 20718 non-null object 3 Track 20718 non-null object 4 Album 20718 non-null object 5 Album_type 20718 non-null object 6 Uri 20718 non-null object 7 Danceability 20716 non-null float64 8 Energy 20716 non-null float64 9 Key 20716 non-null float64 10 Loudness 20716 non-null float64 11 Speechiness 20716 non-null float64 12 Acousticness 20716 non-null float64 13 Instrumentalness 20716 non-null float64 14 Liveness 20716 non-null float64 15 Valence 20716 non-null float64 16 Tempo 20716 non-null float64 17 Duration_ms 20716 non-null float64 18 Url_youtube 20248 non-null object 19 Title 20248 non-null object 20 Channel 20248 non-null object 21 Views 20248 non-null float64 22 Likes 20177 non-null float64 23 Comments 20149 non-null float64 24 Description 19842 non-null object 25 Licensed 20248 non-null object 26 official_video 20248 non-null object 27 Stream 20142 non-null float64 dtypes: float64(15), int64(1), object(12) memory usage: 4.4+ MB
sütun_adları = df.columns.tolist()
print("Sütun Adları:", sütun_adları)
Sütun Adları: ['Unnamed: 0', 'Artist', 'Url_spotify', 'Track', 'Album', 'Album_type', 'Uri', 'Danceability', 'Energy', 'Key', 'Loudness', 'Speechiness', 'Acousticness', 'Instrumentalness', 'Liveness', 'Valence', 'Tempo', 'Duration_ms', 'Url_youtube', 'Title', 'Channel', 'Views', 'Likes', 'Comments', 'Description', 'Licensed', 'official_video', 'Stream']
# Sadece sayısal sütunları seçerek açıklayıcı istatistikler hesaplayalım.
numerical_columns = df.select_dtypes(include=['float64'])
numerical_description = numerical_columns.describe()
print(numerical_description)
Danceability Energy Key Loudness Speechiness \
count 20716.000000 20716.000000 20716.000000 20716.000000 20716.000000
mean 0.619777 0.635250 5.300348 -7.671680 0.096456
std 0.165272 0.214147 3.576449 4.632749 0.111960
min 0.000000 0.000020 0.000000 -46.251000 0.000000
25% 0.518000 0.507000 2.000000 -8.858000 0.035700
50% 0.637000 0.666000 5.000000 -6.536000 0.050500
75% 0.740250 0.798000 8.000000 -4.931000 0.103000
max 0.975000 1.000000 11.000000 0.920000 0.964000
Acousticness Instrumentalness Liveness Valence \
count 20716.000000 20716.000000 20716.000000 20716.000000
mean 0.291535 0.055962 0.193521 0.529853
std 0.286299 0.193262 0.168531 0.245441
min 0.000001 0.000000 0.014500 0.000000
25% 0.045200 0.000000 0.094100 0.339000
50% 0.193000 0.000002 0.125000 0.537000
75% 0.477250 0.000463 0.237000 0.726250
max 0.996000 1.000000 1.000000 0.993000
Tempo Duration_ms Views Likes Comments \
count 20716.000000 2.071600e+04 2.024800e+04 2.017700e+04 2.014900e+04
mean 120.638340 2.247176e+05 9.393782e+07 6.633411e+05 2.751899e+04
std 29.579018 1.247905e+05 2.746443e+08 1.789324e+06 1.932347e+05
min 0.000000 3.098500e+04 0.000000e+00 0.000000e+00 0.000000e+00
25% 97.002000 1.800095e+05 1.826002e+06 2.158100e+04 5.090000e+02
50% 119.965000 2.132845e+05 1.450110e+07 1.244810e+05 3.277000e+03
75% 139.935000 2.524430e+05 7.039975e+07 5.221480e+05 1.436000e+04
max 243.372000 4.676058e+06 8.079649e+09 5.078865e+07 1.608314e+07
Stream
count 2.014200e+04
mean 1.359422e+08
std 2.441321e+08
min 6.574000e+03
25% 1.767486e+07
50% 4.968298e+07
75% 1.383581e+08
max 3.386520e+09
df.duplicated()
0 False
1 False
2 False
3 False
4 False
...
20713 False
20714 False
20715 False
20716 False
20717 False
Length: 20718, dtype: bool
df.isnull().sum()
Unnamed: 0 0 Artist 0 Url_spotify 0 Track 0 Album 0 Album_type 0 Uri 0 Danceability 2 Energy 2 Key 2 Loudness 2 Speechiness 2 Acousticness 2 Instrumentalness 2 Liveness 2 Valence 2 Tempo 2 Duration_ms 2 Url_youtube 470 Title 470 Channel 470 Views 470 Likes 541 Comments 569 Description 876 Licensed 470 official_video 470 Stream 576 dtype: int64
df.nunique()
Unnamed: 0 20718 Artist 2079 Url_spotify 2079 Track 17841 Album 11937 Album_type 3 Uri 18862 Danceability 898 Energy 1268 Key 12 Loudness 9417 Speechiness 1303 Acousticness 3138 Instrumentalness 4012 Liveness 1536 Valence 1293 Tempo 15024 Duration_ms 14690 Url_youtube 18154 Title 18146 Channel 6714 Views 19245 Likes 17939 Comments 10485 Description 17395 Licensed 2 official_video 2 Stream 18461 dtype: int64
##Kullanmayacağımız gereksiz sütunları silelim
df.drop(['Unnamed: 0','Url_spotify','Uri', 'Url_youtube', 'Title', 'Description'],axis =1, inplace = True)
##Tablodaki Eksik Değerleri Kaldıralım
df.dropna(inplace = True)
df.isnull().sum()
Artist 0 Track 0 Album 0 Album_type 0 Danceability 0 Energy 0 Key 0 Loudness 0 Speechiness 0 Acousticness 0 Instrumentalness 0 Liveness 0 Valence 0 Tempo 0 Duration_ms 0 Channel 0 Views 0 Likes 0 Comments 0 Licensed 0 official_video 0 Stream 0 dtype: int64
print(df.columns)
Index(['Artist', 'Track', 'Album', 'Album_type', 'Danceability', 'Energy',
'Key', 'Loudness', 'Speechiness', 'Acousticness', 'Instrumentalness',
'Liveness', 'Valence', 'Tempo', 'Duration_ms', 'Channel', 'Views',
'Likes', 'Comments', 'Licensed', 'official_video', 'Stream'],
dtype='object')
import plotly.graph_objects as go
import pandas as pd
# Her bir sanatçının Energy, Danceability ve Valence özelliklerinin ortalamasını hesaplayalım
artist_metrics = df.groupby('Artist')[['Energy', 'Danceability', 'Valence']].mean().reset_index()
# En üst 20 sanatçıyı seçelim
top_20_artists = artist_metrics.sort_values(by='Energy', ascending=False).head(20)
# Canlı renkler listesi
colors = ['lightgreen', 'limegreen', 'darkgreen']
# Plotly ile her bir metrik için ayrı grafik oluşturalım
# Energy metriği için grafik
fig1 = go.Figure()
fig1.add_trace(go.Bar(x=top_20_artists['Artist'], y=top_20_artists['Energy'], name='Enerji Seviyesi', marker_color=colors[0],
text=top_20_artists['Energy'], textposition='outside', texttemplate='%{text:.2f}'))
fig1.update_layout(title='En İyi 20 Sanatçıya Göre Ortalama Enerji Seviyesi', xaxis_title='Sanatçı',
yaxis_title='Enerji Seviyesi')
# Danceability metriği için grafik
fig2 = go.Figure()
fig2.add_trace(go.Bar(x=top_20_artists['Artist'], y=top_20_artists['Danceability'], name='Dans Edilebilirlik',
marker_color=colors[1], text=top_20_artists['Danceability'], textposition='outside', texttemplate='%{text:.2f}'))
fig2.update_layout(title='En İyi 20 Sanatçıya Göre Ortalama Dans Edilebilirlik', xaxis_title='Sanatçı',
yaxis_title='Dans Edilebilirlik')
# Valence metriği için grafik
fig3 = go.Figure()
fig3.add_trace(go.Bar(x=top_20_artists['Artist'], y=top_20_artists['Valence'], name='Müzikal Pozitiflik',
marker_color=colors[2], text=top_20_artists['Valence'], textposition='outside', texttemplate='%{text:.2f}'))
fig3.update_layout(title='En İyi 20 Sanatçıya Göre Ortalama Müzikal Pozitiflik', xaxis_title='Sanatçı',
yaxis_title='Müzikal Pozitiflik')
# Grafikleri düzenleme
fig1.show()
fig2.show()
fig3.show()
import plotly.graph_objects as go
# Albüm türlerini say ve veri kümesini oluştur
album_types = df['Album_type'].value_counts().reset_index()
album_types.columns = ['Album Type', 'Count']
# Verileri hazırla
labels = album_types['Album Type'].tolist()
sizes = album_types['Count'].tolist()
# Pasta grafiğini oluştur
fig = go.Figure(data=[go.Pie(labels=labels, values=sizes,
textinfo='label+percent',
insidetextorientation='radial',
hole=0.3,
marker=dict(colors=['orange', 'darkgreen', 'lightgreen']))])
# Grafiği düzenle
fig.update_layout(title='Albüm Türlerine Göre Dağılım')
# Grafiği göster
fig.show()
import plotly.graph_objects as go
# Scatter plot oluştur
fig = go.Figure(data=go.Scatter(x=df['Views'], y=df['Likes'],
mode='markers', marker=dict(color='green', opacity=0.7, line=dict(color='black', width=1))))
# Trendline ekle
import numpy as np
z = np.polyfit(df['Views'], df['Likes'], 1)
p = np.poly1d(z)
fig.add_trace(go.Scatter(x=df['Views'], y=p(df['Views']), mode='lines', line=dict(color='orange', dash='dash')))
# Grafiği düzenle
fig.update_layout(title='Görüntüler ile Beğeniler Arasındaki İlişki',
xaxis_title='Görüntüler',
yaxis_title='Beğeniler',
showlegend=False,
plot_bgcolor='white')
# Grafiği göster
fig.show()
import plotly.graph_objects as go
# Scatter plot oluştur
fig = go.Figure(data=go.Scatter(x=df['Views'], y=df['Comments'],
mode='markers', marker=dict(color='darkorange', opacity=0.7, line=dict(color='black', width=1))))
# Trendline ekle
import numpy as np
z = np.polyfit(df['Views'], df['Comments'], 1)
p = np.poly1d(z)
fig.add_trace(go.Scatter(x=df['Views'], y=p(df['Views']), mode='lines', line=dict(color='green', dash='dash')))
# Grafiği düzenle
fig.update_layout(title='Görüntüler ile Yorumlar Arasındaki İlişki',
xaxis_title='Görüntüler',
yaxis_title='Yorumlar',
showlegend=False,
plot_bgcolor='white')
# Grafiği göster
fig.show()
import plotly.graph_objects as go
import pandas as pd
# Sayısal sütunları seç ve korelasyon matrisini hesaplayalım
numeric_df = df.select_dtypes(include='number')
correlation_matrix = numeric_df.corr().round(2)
# Isı haritası oluşturma
fig = go.Figure(data=go.Heatmap(z=correlation_matrix.values,
x=correlation_matrix.columns,
y=correlation_matrix.index,
colorscale='Oranges',
colorbar=dict(title='Korelasyon')))
# Kareler üzerine sayısal değerleri ekleyelim
for i in range(len(correlation_matrix.index)):
for j in range(len(correlation_matrix.columns)):
value = correlation_matrix.values[i, j]
if value > 0.95:
color = 'white'
else:
color = 'black'
fig.add_annotation(x=correlation_matrix.columns[j], y=correlation_matrix.index[i],
text=str(value),
showarrow=False, font=dict(color=color, size=10),
xanchor='center', yanchor='middle')
# Grafiği düzenleme
fig.update_layout(title='Kolonlar Arasındaki Korelasyon Matrisi')
# Grafiği göster
fig.show()